INTRODUCTION

This report is intended to explore some ideas about LEMI dataset accessed with the lemis R package. LEMIS wildlife trade data trends from 2000 through 2014. You can read more about this dataset Here

#Load Libraries: p_load can install, load,  and update packages
pacman::p_load(rstudioapi,dplyr, ggplot2, lubridate, devtools, tidyr,magrittr, 
               lemis,zoo,gdata, knitr, DT, plotly)

# Set working directory 
path <- getwd()
knitr::opts_knit$set(root.dir = normalizePath(path.expand(path), 
                                              winslash = ("/"), 
                                              mustWork = TRUE))

# Reading dataset 
data<-readRDS("../data/data.rds")

# Transform some variables to factor/numeric/datetime
data %<>% mutate_at(c("control_number", "species_code", "taxa", "class", "genus",
                      "species", "subspecies", "specific_name", "generic_name",
                      "description", "country_origin", "country_imp_exp", 
                      "purpose","source", "action", "disposition", 
                      "disposition_year", "shipment_year", "import_export",
                      "port", "us_co", "foreign_co", "unit"), as.factor)

# Remove scientific notation
options(scipen=999)
rm(path)

A. THINGS TO START LOOKING AT/THINKING ABOUT

1) Excluding plants and microorganisms based on the kingdom level

We classified 99.23 % of data by kingdoms using the class, taxa, genus, generic name, species and description columns. This way, we excluded those elements belonging to fungi, plantae, bacteria, chromista and unspecified kingdoms.

Based just on the kingdom designation, we were able to classify 92.67% of data

  • Bacterias: Gammaproteobacteria

  • Chromista: Phaeophyceae

  • Plantae: Cycadopsida, Liliopsida, Magnoliopsida, Pinopsida, Polypodiopsida

  • Fungi: Agaricomycetes, Chytridiomycetes

  • Animalia: Actinopterygii, Amphibia, Anthozoa, Arachnida, Ascidiacea, Asteroidea, Aves, Bivalvia, Branchiopoda, Calcarea, Cephalaspidomorphi, Cephalopoda, Cestoda, Chilopoda, Clitellata, Cubozoa, Demospongiae, Diplopoda, Echinoidea, Elasmobranchii, Enteropneusta, Eurotatoria,Gastropoda, Gymnolaemata, Hexactinellida, Hexanauplia, Holocephali, Holothuroidea, Hoplonemertea, Hydrozoa, Insecta, Leptocardii, Malacostraca, Mammalia, Maxillopoda,Merostomata, Myxini, Ophiuroidea, Ostracoda, Pilidiophora, Polychaeta, Polyplacophora, Pycnogonida, Reptilia, Sarcopterygii, Scaphopoda, Scyphozoa, Secernentea, Sipunculidea, Tentaculata, Thaliacea, Trematoda

# Classifying classes by kingdom 
classes<-data %>% drop_na(class) %>% group_by(class) %>% summarise(N=n())

animalia_kingdom <- c("Actinopterygii", "Amphibia", "Anthozoa", "Arachnida",
                      "Ascidiacea", "Asteroidea", "Aves", "Bivalvia",
                      "Branchiopoda", "Calcarea", "Cephalaspidomorphi",
                      "Cephalopoda", "Cestoda", "Chilopoda", "Clitellata",
                      "Crinoidea", "Cubozoa", "Demospongiae", "Diplopoda",
                      "Echinoidea", "Elasmobranchii", "Enteropneusta",
                      "Eurotatoria", "Gastropoda", "Gymnolaemata",
                      "Hexactinellida","Hexanauplia", "Holocephali",
                      "Holothuroidea", "Hoplonemertea", "Hydrozoa", "Insecta",
                      "Leptocardii", "Malacostraca", "Mammalia", "Maxillopoda",
                      "Merostomata", "Myxini", "Ophiuroidea", "Ostracoda",
                      "Pilidiophora", "Polychaeta", "Polyplacophora",
                      "Pycnogonida", "Reptilia", "Sarcopterygii", "Scaphopoda",
                      "Scyphozoa", "Secernentea", "Sipunculidea", "Tentaculata",
                      "Thaliacea", "Trematoda")

fungi_kingdom <-c("Agaricomycetes", "Chytridiomycetes")

plantae_kingdom<- c("Cycadopsida", "Liliopsida", "Magnoliopsida", "Pinopsida", 
                    "Polypodiopsida", "Ulvophyceae")

bacterias_kingdom <- c("Gammaproteobacteria")

chromista_kingdom <- c("Phaeophyceae")
# Adding kingdom level based on class
data$kingdom<- ifelse(data$class %in% animalia_kingdom, "Animalia",
               ifelse(data$class %in% fungi_kingdom, "Fungi",        
               ifelse(data$class %in% plantae_kingdom, "Plantae", 
               ifelse(data$class %in% bacterias_kingdom, "Bacteria",                 
               ifelse(data$class %in% chromista_kingdom, "Chromista", "Other"                              )))))

rm(classes, animalia_kingdom, bacterias_kingdom, chromista_kingdom,
   plantae_kingdom, fungi_kingdom)

# Adding kingdom level based on genus
data$kingdom[which(data$genus=="Other live inverts" | data$genus=="Animals" |
                   data$genus=="Corals"| data$genus=="Dugesia"|data$genus=="Nemertea" |
                   data$genus=="Mollusca" | data$genus=="Xenoturbella" |
                   data$genus=="Chondrichthyes" | data$genus=="Chordata" | 
                   data$genus=="Paracatenula" |  data$genus=="Porifera")] <- "Animalia"
                                                         # 403,596  --> 299,674

# Adding kingdom level based on taxa
data$kingdom[which(data$taxa=="crustacean" | data$taxa=="fish" | data$taxa=="coral"|
                   data$taxa=="shell")] <- "Animalia"   # 299,673  --> 58,883

data$kingdom[which(data$taxa=="plant")] <- "Plantae"    #  58,883   --> 58,855

# Adding kingdom level based on generic name
data$kingdom[which(data$generic_name=="OTHER INVERTEBRATE" |   
                   data$generic_name=="SHELL" |
                   data$generic_name=="CORAL" | 
                   data$generic_name=="WORM")] <- "Animalia"  # 58,855 <- 40,277

# Adding kingdom level based on species
data$kingdom[which(data$species=="freshwater fish" | 
                   data$species=="invertebrates")] <- "Animalia" # 40,277 <- 40,275

# Adding kingdom level based on description
data$kingdom[which(data$description=="BOC" | data$description=="BOD" |
                   data$description=="BON" | data$description=="BOP" |
                   data$description=="CAP" | data$description=="CAV" | 
                   data$description=="CLA" | data$description=="CLA" | 
                   data$description=="MEA" | data$description=="EGG" | 
                   data$description=="EGL" | data$description=="FEA" | 
                   data$description=="FIG" | data$description=="IVC" | 
                   data$description=="LPL" | data$description=="LPS" |
                   data$description=="PLA" | data$description=="SKE" |
                   data$description=="SKI" | data$description=="SKP" |
                   data$description=="SKU" | data$description=="TEE" |
                   data$description=="TRO")] <- "Animalia" # 40,275 <- 32,048

data$kingdom[which(data$description=="CUT" | data$description=="DPL" |
                   data$description=="LOG" | data$description=="LVS" |
                   data$description=="WPR")] <- "Plantae" 
                                                                # 32,048 <- 32, 020

# Let's exclude those elements from fungi, plantae, bacteria, chromista and unspecified kingdoms. 
data <- data %>% filter(kingdom=="Animalia") 
data <- data %>% filter(taxa!="plant") 

2) Distinct species in the dataset?

It seems that there are 8,083 different species. We decided to exclude some elements like “freshwater sp.”, “including goldfish”, “marine sp”, “in trop fish &” and “sp”, due to they are not specific species.

  • The top 15 represents 26.53 % of data

  • The top 50 represents 41.46 % of data

data_species<- data %>% filter(species!="(freshwater sp.)" & 
                          species!= "(including goldfish)" &
                          species!="(marine sp.)" & 
                          species != "sp." & species != "in trop fish &")
                            
# n_distinct(data_species$species, na.rm = TRUE) # <- 8,083

species <-data_species %>% group_by(species) %>%
  summarise(total=n(), percentage= round(n()/nrow(data) *100,2)) %>%
  drop_na(species) %>%
  arrange(desc(total)) %>% top_n(15, total) 

DT::datatable(species) %>%
  formatCurrency('total',currency = "", interval = 3, mark = ",")
ggplot(data=species, aes(x=reorder(species, -percentage), y=percentage)) + 
  geom_bar(stat="identity",fill="steelblue") +
  geom_text(aes(label=round(percentage,2)))+
  theme(axis.text.x = element_text(hjust=1, angle=60)) +
  labs(y = "Percentage",  x="species") 

rm(data_species,species)

3) Total value of the wildlife product(s) in US dollars, measured by disposition year

value_wildlifeproduct_dispyear <-data %>% 
  filter(!is.na(value))  %>%
  filter(!is.na(disposition_year))  %>%
  group_by(disposition_year) %>%
  summarise(value = sum(as.numeric(value)))

DT::datatable(value_wildlifeproduct_dispyear) %>%
  formatCurrency('value',currency = "", interval = 3, mark = ",")
ggplot(value_wildlifeproduct_dispyear, aes(x = disposition_year, y=value)) + 
  geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
  geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
  scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
  labs(y = "Dollars",  x="Disposition Year")  +
  theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(value_wildlifeproduct_dispyear)

4) Total value of the wildlife product(s) in US dollars, measured by shipment year

value_wildlifeproduct_shipyear <-data %>% 
  filter(!is.na(value))  %>%
  filter(!is.na(shipment_year))  %>%
  group_by(shipment_year) %>%
  summarise(value = sum(as.numeric(value)))

DT::datatable(value_wildlifeproduct_shipyear) %>%
  formatCurrency('value',currency = "", interval = 3, mark = ",")
ggplot(value_wildlifeproduct_shipyear, aes(x = shipment_year, y=value)) + 
  geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
  geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
  scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
  labs(y = "Dollars",  x="Shipment Year") +
  theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(value_wildlifeproduct_shipyear)

5) Quantity brought by year

quantity_year<- data %>% filter(unit=="NO" | unit=="KG") # 5,107,119 -> 5,078,227
quantity_year_summary<- quantity_year %>% group_by(shipment_year, unit) %>%
  summarise(quantity = sum(quantity)) 

# Number per year
DT::datatable(quantity_year_summary[quantity_year_summary$unit== "NO", ])  %>%
  formatCurrency('quantity',currency = "", interval = 3, mark = ",")
ggplot(quantity_year_summary[quantity_year_summary$unit== "NO", ],
       aes(x = shipment_year, y=quantity)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       geom_text(aes(label = scales::comma(quantity)), vjust=-1, size=2) +
       labs(y = "Number",  x="Shipment Year") +
  theme(axis.text.x = element_text(hjust=1, angle=60)) 

# Kilograms per year
DT::datatable(quantity_year_summary[quantity_year_summary$unit== "KG", ]) %>%
  formatCurrency('quantity',currency = "", interval = 3, mark = ",")
ggplot(quantity_year_summary[quantity_year_summary$unit== "KG", ], 
       aes(x = shipment_year, y=quantity)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       geom_text(aes(label = scales::comma(quantity)), vjust=-1, size=2) +
       labs(y = "Kilograms",  x="Shipment Year")  +
  theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(quantity_year, quantity_year_summary)

6) Total value of import by taxa

value_bytaxa<- data %>% group_by(taxa) %>%
  filter(!is.na(value))  %>%
  summarise(value=sum(as.numeric(value))) %>%
  arrange(desc(value))

DT::datatable(value_bytaxa) %>%
  formatCurrency('value',currency = "", interval = 3, mark = ",")
ggplot(value_bytaxa, aes(x = reorder(taxa, -value), y=value)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Taxa") +
  theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(value_bytaxa)

7) Total value of import by taxa by year

value_bytaxayear<- data %>% group_by(taxa, shipment_year) %>%
  filter(!is.na(value))  %>%
  summarise(value=sum(as.numeric(value))) 

value_bytaxayear01<- value_bytaxayear %>% filter(taxa %in% c("mammal", "shell", 
                                                              "fish",  "reptile",
                                                              "reptile", "insect", 
                                                             "bird"))

value_bytaxayear02<- value_bytaxayear %>% filter(taxa %in% c("amphibian", 
                                                              "coral", "crustacean" ,
                                                              "echinoderms",
                                                             "other"))

value_bytaxayear03<- value_bytaxayear %>% filter(taxa %in% c("annelid",
                                                             "spider"))
DT::datatable(value_bytaxayear) %>%
  formatCurrency('value',currency = "", interval = 3, mark = ",")

Here you have different graphs with some taxa elements

ggplot(value_bytaxayear01,aes(x = shipment_year, y=value, colour=taxa, group=1)) + 
       geom_line(aes(group = taxa)) +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Taxa") +
        theme(axis.text.x = element_text(hjust=1, angle=60)) 

ggplot(value_bytaxayear02,aes(x = shipment_year, y=value, colour=taxa, group=1)) + 
       geom_line(aes(group = taxa)) +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Taxa") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

ggplot(value_bytaxayear03,aes(x = shipment_year, y=value, colour=taxa, group=1)) + 
       geom_line(aes(group = taxa)) +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Taxa") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

Here you have a dynamic graph, where you can choose the specific taxa elements you want to display

value_bytaxayear %>%  
  plot_ly(x = ~ shipment_year) %>% 
  add_lines(y = ~ value, 
            color = ~ taxa,
             visible="legendonly")
rm(value_bytaxa, value_bytaxayear, value_bytaxayear01, value_bytaxayear02, value_bytaxayear03)

8) Total value of import by description

value_bydescription<- data %>% group_by(description) %>%
  filter(!is.na(value))  %>%
  summarise(value=sum(as.numeric(value))) %>%
  arrange(desc(value))

DT::datatable(value_bydescription)  %>%
  formatCurrency('value',currency = "", interval = 3, mark = ",")
value_bydescription %>% top_n(15) %>%
ggplot(aes(x = reorder(description, -value), y=value)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Description") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 
## Selecting by value

rm(value_bydescription)

9) Total value of import by description by year

value_bydescriptionyear<- data %>% group_by(description, shipment_year) %>%
  filter(!is.na(value))  %>%
  summarise(value=sum(as.numeric(value))) 

value_bydescriptionyear01<- value_bydescriptionyear %>% 
  filter(description %in% c("SPR", "GAR",  "IVC",
                            "LPS", "SHO"))

value_bydescriptionyear02<- value_bydescriptionyear %>% 
  filter(description %in% c("LIV", "MEA", "SKI",
                            "BOD", "JWL"))

value_bydescriptionyear03<- value_bydescriptionyear %>% 
  filter(description %in% c("TRI", "LPL", "IVP",
                            "SPE", "SHE"))

DT::datatable(value_bydescriptionyear)  %>%
  formatCurrency('value',currency = "", interval = 3, mark = ",")

Here you have different graphs with some descriptions

ggplot(value_bydescriptionyear01,aes(x = shipment_year, y=value, 
                                     colour=description, group=1)) + 
       geom_line(aes(group = description)) +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Description") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

ggplot(value_bydescriptionyear02,aes(x = shipment_year, y=value, 
                                     colour=description, group=1)) + 
       geom_line(aes(group = description)) +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Description") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

ggplot(value_bydescriptionyear03,aes(x = shipment_year, y=value, 
                                     colour=description, group=1)) + 
       geom_line(aes(group = description)) +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(value)), vjust=-1, size=2) +
       labs(y = "Value",  x="Description") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

Here you have a dynamic graph, where you can choose the specific descriptions you want to display

value_bydescriptionyear %>%  
  plot_ly(x = ~ shipment_year) %>% 
  add_lines(y = ~ value, 
            color = ~ description,
             visible="legendonly")
rm(value_bydescriptionyear, value_bydescriptionyear01, value_bydescriptionyear02, value_bydescriptionyear03)

B. AVERAGE UNIT VALUES

10) Average value of import by taxa (for “NO” units)

avgimport_bytaxa_no<- data %>% 
  filter(unit=="NO" & quantity != 0) %>%
  mutate(import=value/quantity) %>%
  group_by(taxa) %>%
  filter(!is.na(import))  %>%
  summarise(import_mean=round(mean(as.numeric(import)),2), 
            import_median = round(median(as.numeric(import))),2) %>%
  arrange(desc(import_median)) 

DT::datatable(avgimport_bytaxa_no)  %>%
  formatCurrency('import_median',currency = "", interval = 3, mark = ",")
ggplot(avgimport_bytaxa_no, aes(x = reorder(taxa, -import_median), y=import_median)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(import_median)), vjust=-1, size=2) +
       labs(y = "Median Import",  x="Taxa") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(avgimport_bytaxa_no)

11) Average value of import by description (for “NO” units)

avgimport_bydescrip_no<- data %>% 
  filter(unit=="NO" & quantity != 0) %>%
  mutate(import=value/quantity) %>%
  group_by(description) %>%
  filter(!is.na(import))  %>%
  summarise(import_mean=round(mean(as.numeric(import)),2), 
            import_median = round(median(as.numeric(import))),2) %>%
  arrange(desc(import_median))

DT::datatable(avgimport_bydescrip_no)  %>%
  formatCurrency('import_median',currency = "", interval = 3, mark = ",")
avgimport_bydescrip_no %>% top_n(15, wt=import_median) %>%
ggplot(aes(x = reorder(description, -import_median), y=import_median)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(import_median)), vjust=-1, size=2) +
       labs(y = "Median Import",  x="Description") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(avgimport_bydescrip_no)

12) Average value of import by taxa (for “KG” units)

avgimport_bytaxa_kg<- data %>% 
  filter(unit=="KG" & quantity != 0) %>%
  mutate(import=value/quantity) %>%
  group_by(taxa) %>%
  filter(!is.na(import))  %>%
  summarise(import_mean=round(mean(as.numeric(import)),2), 
            import_median = round(median(as.numeric(import))),2) %>%
  arrange(desc(import_median))

DT::datatable(avgimport_bytaxa_kg)  %>%
  formatCurrency('import_median',currency = "", interval = 3, mark = ",")
ggplot(avgimport_bytaxa_kg, aes(x = reorder(taxa, -import_median), y=import_median)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(import_median)), vjust=-1, size=2) +
       labs(y = "Median Import",  x="Taxa") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(avgimport_bytaxa_kg)

13) Average value of import by description (for “KG” units)

avgimport_bydescription_kg<- data %>% 
  filter(unit=="KG" & quantity != 0) %>%
  mutate(import=value/quantity) %>%
  group_by(description) %>%
  filter(!is.na(import))  %>%
  summarise(import_mean=round(mean(as.numeric(import)),2), 
            import_median = round(median(as.numeric(import))),2) %>%
  arrange(desc(import_median))

DT::datatable(avgimport_bydescription_kg)  %>%
  formatCurrency('import_median',currency = "", interval = 3, mark = ",")
avgimport_bydescription_kg %>% top_n(15, wt=import_median) %>%
ggplot(aes(x = reorder(description, -import_median), y=import_median)) + 
       geom_bar(stat = "identity", width=0.4, position = position_dodge(width=0.5), 
           fill="blue") +
       scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
       geom_text(aes(label = scales::comma(import_median)), vjust=-1, size=2) +
       labs(y = "Median Import",  x="Description") +
       theme(axis.text.x = element_text(hjust=1, angle=60)) 

rm(avgimport_bytaxa_kg)

Sources: